
source("3.2.1 EtaEst.R")
source("3.2.2 GetRatio.R")
source("3.2.3 GetMaxKGen.R")
source("3.2.4 GetK.R")
source("3.2.5 PMaxEstRough.R")
source("3.2.6 nLL.R")
source("3.2.7 MarginalRatio.R")

PMLE = function(data, cnt, K, seed, max.step = 500, thres = 1e-4,mle.warm=NULL){
    # max.step = 5;thres = 1e-4; cnt <- count #DEBUG
    
    ## First estimate eta
    eta.coef        = EtaEst(data, cnt, K)
    X.names         = paste("X",1:pL0,sep="")
    names(eta.coef) = c("gamma00","gamma01","gamma10","gamma11","gamma.",X.names)
    
    eta.coef        = matrix(eta.coef[c(1,6:(5+pL0), 2, 6:(5+pL0), 3, 6:(5+pL0),
                                 4, 6:(5+pL0), 5:(5+pL0))],,5)
    colnames(eta.coef) = c("00","01","10","11",".") 
    
    set.seed(seed)
    epsilon = 0.1
    # "a_k"
    theta.coef = runif(pL0+2,-epsilon, epsilon)
    phi.coef = runif(pL0+3,  -epsilon, epsilon)
    gop.coef = runif(pL0+1,  -epsilon, epsilon)
    if (!is.null(mle.warm)){
        theta.coef = mle.warm$theta.coef[c(1:2, 4:7)]
        phi.coef = mle.warm$phi.coef
        gop.coef = mle.warm$gop.coef[c(1, 4:7)]
    }
    names(theta.coef) = c("alpha0","alpha1",X.names)
    names(phi.coef)   = c("beta0","beta1","beta.",X.names)
    names(gop.coef)   = c("delta0",X.names)
    
    pars = list(theta.coef=theta.coef, phi.coef=phi.coef, gop.coef=gop.coef)
    
    Optim = function(par.update, fn, parn, pars, step, seed){
        optim(par.update, fn, parn=parn, pars = pars, 
              data = data, eta.coef = eta.coef, cnt = cnt,
              rep = 500, seed = seed, control=list(maxit=max.step))
              # ,method = "L-BFGS-B", lower = -10, upper = 10)
    } 
    
    ## Optimization 
    ptm = proc.time()
    Diff = function(x,y){
        x = pmax(pmin(x, 10), -10)
        y = pmax(pmin(y, 10), -10)
        return(sum((x-y)^2)/sum(x^2+thres))
    } 
    diff = thres + 1; step = 0
    while(diff > thres & step < max.step + 100){
        ptm1 = proc.time()
        step = step + 1
        diff.theta = diff.phi = 0
        # update theta
        opt.theta       = Optim(pars$theta.coef, nLL, "theta.coef", 
                                pars, step, seed)
        diff.theta      = Diff(opt.theta$par,pars$theta.coef)
        pars$theta.coef[1:length(pars$theta.coef)] = opt.theta$par
        
        # update phi
        opt.phi       = Optim(pars$phi.coef, nLL, "phi.coef", pars, step, seed)
        diff.phi      = Diff(opt.phi$par,pars$phi.coef)
        pars$phi.coef[1:length(pars$phi.coef)] = opt.phi$par
        
        # update gop
        opt.gop       = Optim(pars$gop.coef, nLL, "gop.coef", pars, step,seed)
        diff.gop      = Diff(opt.gop$par,pars$gop.coef)
        pars$gop.coef[1:length(pars$gop.coef)] = opt.gop$par
        
        diff = max(diff.theta, diff.phi, diff.gop) + diff / 10
        if(step %% 20 == 0) cat("This is the ", step, "th step. Diff = ", diff,
                               " nll = ", opt.gop$value, 
                               "; it takes ", (proc.time()-ptm1)[3], "s \n")
    }
    
    pars$nll = opt.gop$value
    
    pars$contrast = CalMarRatio(pars, data, cnt, eta.coef, rep = 500,seed=seed)
        
    pars$theta.coef = c(pars$theta.coef[1:2],NA,pars$theta.coef[3:(pL0+2)])
    pars$gop.coef   = c(pars$gop.coef[1],NA,NA,pars$gop.coef[2:(pL0+1)])
    pars$eta.coef   = t(matrix(c(eta.coef[1,c(1:2,5)],eta.coef[2:(pL0+1),1],
                        eta.coef[1,3:4], rep(NA,pL0+1)),,2))
    pars$time = (proc.time() - ptm)[3]
    # browser()
    # pars$n01  = nLL(0, "n01", pars, data, eta.coef, cnt, seed = seed)
    pars$n01  = NA
    return(pars)
}






